set more off
cap log close
clear
set virtual on

log using regression_analysis_empins, replace
* This program restricts the individuals in the public sample to just those with some form of employer provided coverage and then compares the estimated MSA variance


tempfile basedat rfedat
global varlist="tot_med inpat_exp outpat_exp rx_exp no_hosp days_hosp outpat_vis rx_30de"
global varlist_ex="tot_med inpat_exp outpat_exp rx_exp"
global varlist_ut="no_hosp days_hosp outpat_vis rx_30de"


use analytic_file, clear
drop msa_count MSA*
* Convert dollar values into thousands
foreach i in $varlist_ex {
replace `i'=`i'/1000
}

sort id
merg id using emp_cov.dta
drop if _merge==2
drop if emp_cov==0

** Our samples are significantly reduced, so we eliminate all MSAs that are left with 25 or fewer individuals
egen msa_count=count(year), by(mcbs msa)
egen mincount=min(msa_count), by(msa)
keep if mincount>50

tab msa, gen(MSA)
global num_msas=r(r)
global num_coeff=$num_msas-1

** The number of MSAs left in our sample is:
display $num_msas


save "`basedat'", replace

** What do these data look like?

* Utilization
*Private
tabstat no_hosp days_hosp outpat_vis rx_30de if mcbs==0, c(s) s(mean sd p25 p50 p75)  format(%9.2fc)
*Public
tabstat no_hosp days_hosp outpat_vis rx_30de if mcbs==1, c(s) s(mean sd p25 p50 p75)  format(%9.2fc)

* Spending
*Private
tabstat tot_med inpat_exp outpat_exp rx_exp  if mcbs==0, c(s) s(mean sd p25 p50 p75)  format(%9.0fc)
*Public
tabstat tot_med inpat_exp outpat_exp rx_exp if mcbs==1, c(s) s(mean sd p25 p50 p75)  format(%9.0fc)


** REGRESSION ESTIMATES **

* Step 1: Regression

foreach i in $varlist {
use "`basedat'", clear

* Private regressions
reg `i' MSA2-MSA$num_msas age age2 male age_male age2_male income income2 dz_2-dz_30 Y2-Y7 if mcbs==0, robust
matrix A=e(V)
* Private coefficient estimates
scalar `i'_e1=0
for ZZ in num 2/$num_msas: scalar `i'_eZZ=_b[MSAZZ]
* Private coefficient variances
scalar `i'_v1=0
for ZZ in num 2/$num_msas \ XX in num 1/$num_coeff: scalar `i'_vZZ=A[XX,XX]

scalar `i'_priv_cv=0
forvalues xx=1(1)$num_coeff {
forvalues mm =1(1)$num_coeff {
scalar `i'_priv_cv=`i'_priv_cv+A[`mm',`xx']
}
}

* Public regressions
reg `i' MSA2-MSA$num_msas age age2 male age_male age2_male income income2 dz_2-dz_30 Y2-Y7 if mcbs==1, robust
matrix A=e(V)
* Public coefficient estimates
scalar `i'_pe1=0
for ZZ in num 2/$num_msas: scalar `i'_peZZ=_b[MSAZZ]
* Public coefficient variances
scalar `i'_pv1=0
for ZZ in num 2/$num_msas \ XX in num 1/$num_coeff: scalar `i'_pvZZ=A[XX,XX]

scalar `i'_pub_cv=0
forvalues xx=1(1)$num_coeff {
forvalues mm =1(1)$num_coeff {
scalar `i'_pub_cv=`i'_pub_cv+A[`mm',`xx']
}
}


drop dz* Y* mcbs MSA* 

gen `i'_MSA1=0
gen `i'_MSA_v1=0

gen `i'_MSA_pub1=0
gen `i'_MSA_pub_v1=0

for JJ in num 2/$num_msas: gen `i'_MSAJJ=`i'_eJJ \ gen `i'_MSA_vJJ=`i'_vJJ
for JJ in num 2/$num_msas: gen `i'_MSA_pubJJ=`i'_peJJ \ gen `i'_MSA_pub_vJJ=`i'_pvJJ

gen `i'_cov=`i'_priv_cv
gen `i'_pub_cov=`i'_pub_cv

keep `i'_MSA* `i'_cov `i'_pub_cov

duplicates drop
reshape long `i'_MSA `i'_MSA_pub `i'_MSA_v `i'_MSA_pub_v , i(`i'_cov `i'_pub_cov) j(msa)

cap append using "`rfedat'"
save "`rfedat'", replace
}


* Load the data and compute the baseline corrections

use "`rfedat'", clear


foreach i in $varlist {

* Private
sum `i'_MSA
scalar `i'_sd=r(sd)
scalar `i'_v=r(sd)^2
sum `i'_MSA_v
scalar v`i'=r(mean)
sum `i'_cov
scalar `i'_samperr=v`i'-(r(mean)/(r(N)^2))
display `i'_samperr
scalar `i'_v_cor=`i'_v-`i'_samperr

* Public
qui sum `i'_MSA_pub
scalar `i'_pub_sd=r(sd)
scalar `i'_pub_v=r(sd)^2
qui sum `i'_MSA_pub_v
scalar v`i'_pub=r(mean)
qui sum `i'_pub_cov
scalar `i'_pub_samperr=v`i'_pub-(r(mean)/(r(N)^2))
display `i'_pub_samperr
scalar `i'_pub_v_cor=`i'_pub_v-`i'_pub_samperr

* Public private differences
scalar `i'_var_diff=`i'_pub_v-`i'_v
scalar `i'_var_diff_cor=`i'_pub_v_cor-`i'_v_cor

* Public private ratios
scalar `i'_var_rat=`i'_pub_v/`i'_v
scalar `i'_var_rat_cor=`i'_pub_v_cor/`i'_v_cor

}


foreach i in $varlist {

matrix `i'_A= [`i'_v, `i'_pub_v, `i'_samperr, `i'_pub_samperr, `i'_v_cor, `i'_pub_v_cor, `i'_var_diff, `i'_var_diff_cor, `i'_var_rat, `i'_var_rat_cor]

}

matrix ut= [ no_hosp_A \ days_hosp_A \ outpat_vis_A \ rx_30de_A ]
matrix ex= [ tot_med_A \ inpat_exp_A \ outpat_exp_A \ rx_exp_A]

matrix colnames ut= var_obs var_obs_pub var_corection var_corection_pub var_cor var_cor_pub var_diff_obs var_diff_cor var_rat var_rat_cor
matrix colnames ex= var_obs var_obs_pub var_corection var_corection_pub var_cor var_cor_pub var_diff_obs var_diff_cor var_rat var_rat_cor
matrix rownames ut=$varlist_ut
matrix rownames ex=$varlist_ex


* DISPLAY RESULTS
* Utilization
matrix list ut, format(%9.5fc)

* Spending
matrix list ex, format(%10.3fc)




